roey.angel@bc.cas.cz

Taxonomical analysis

This analysis explores the taxonomical distribution patters in the different samples, based on the DADA2-produced sequences. Large parts of this script are based on this protocol and the accompanying publication by Callahan and colleagues (2016).

Setting general parameters:

set.seed(1000)
min_lib_size <- 2500
data_path <- "./DADA2_pseudo/"
# Metadata_table <- "./AMetadata_decontam.csv"
# Seq_table <- "DADA2.seqtab_nochim_decontam.tsv"
# Tax_table <- "DADA2_taxa_silva_decontam.tsv"
Proj_name <- "Zin_SIP"
Ps_file <- paste0(Proj_name, "_decontam.Rds")
Seq_file <- "DADA2.reps_decontam.fa"
Var1 = "Density..g.ml.1." # e.g sampling point / replicate
Var2 = "Treatment" # e.g. a treatment or a manipulation
Var3 <- "Label..18O." # e.g. a treatment/manipulation or an important covariant
Var4 = "" # e.g. an important covariant

Load phyloseq object

Read a decontaminated phyloseq object generated by 02_Decontamination.html. Alternatively read raw data and generate a phyloseq object if 02_Decontamination.html was not run.

# # Uncomment if Decontamination wasn't run
# 
# # read OTU mat from data file
# read_tsv(paste0(data_path, Seq_table),
#          trim_ws = TRUE) %>%
#   rename_with(., ~ str_remove(.x, "_L001")) %>%
#   identity() ->
#   abundance_mat # in tibble format
# 
# # get short names of samples
# # abundance_mat %>%
# #   rownames() %>%
# #   str_remove("^Roey[0-9]{3,}-?") %>%
# #   str_split("_", simplify = T) %>%
# #   .[, 1] ->
# #   short_names
# 
# # Read metadata file
# read_csv(Metadata_table,
#                         trim_ws = TRUE) %>%
#   mutate(`ng DNA g-1` = replace(`ng DNA g-1`, which(`ng DNA g-1` == 0 | is.na(`ng DNA g-1`)), 1)) %>%  # add pseudo count
#   filter(str_remove(Read1_file, "_R1_001_noPrimers.fastq.gz") %in% colnames(abundance_mat)) %>% # remove metadata rows if the samples did not go through qual processing
#   mutate(`Library size` = colSums(select(abundance_mat, -ASV))) %>% # Add lib size
#   mutate(to_names = str_remove(Read1_file, "_R1_001_noPrimers.fastq.gz"), .before = 1) %>%
#   mutate(across(c(!!sym(Var1),
#                   !!sym(Var2),
#                   !!sym(Var3),
#                   !!sym(Var4)), ~factor(.))) %>%
#   # mutate(Identifier = paste(Site,
#   #                           `Plant species`,
#   #                           `Preservation method`,
#                             # Replicate,
#   #                           sep = "_"))  %>%   # optional. For merging samples
#   identity() ->
#   Metadata
# 
# # Order abundance_mat samples according to the metadata
# sample_order <- match(Metadata$to_names, str_remove(colnames(select(abundance_mat, -ASV)), "_L001"))
# abundance_mat %<>% select(c(1, sample_order + 1))
# 
# # read taxonomy from data file
# Raw_tax_data <- read_tsv(paste0(data_path, Tax_table),
#                         trim_ws = TRUE, col_names = TRUE)
# Raw_tax_data %<>%
#   replace(., is.na(.), "Unclassified")
# 
# # Potentially store tax classification BS values
# # Raw_tax_data %>%
# #   dplyr::select(.,
# #          `Kingdom (BS)`,
# #          `Phylum (BS)`,
# #          `Class (BS)`,
# #          `Order (BS)`,
# #          `Family (BS)`,
# #          `Genus (BS)`) %>%
# #   cbind(Name = colnames(abundance_mat),. ) ->
# #   Taxonomy.bs
# 
# # merge it downstream with the PS object
# # taxTraits <- tax_table(cbind(tax_table(Ps_obj), taxTraits))
# # Ps_obj <- merge_phyloseq(Ps_obj, taxTraits)
# # Taxonomy.bs %<>%
# #   filter(Taxonomy.bs$Name %in% row.names(Ps_obj_filt@tax_table))
# 
# Raw_tax_data %>%
#   dplyr::select(.,
#          Kingdom,
#          Phylum,
#          Class,
#          Order,
#          Family,
#          Genus) %>%
#   # map_dfr(., as_factor) %>%
#   # map_dfr(fct_expand, "Rare")  %>%
#   mutate(to_names = abundance_mat$ASV, .before = 1)-> # must be a matrix or phyloseq drops row names and gives and error
#   Taxonomy
# # row.names(Taxonomy) <- colnames(abundance_mat)
# # colnames(Taxonomy) <-
# #   c("Domain", "Phylum", "Class", "Order", "Family", "Genus")
# 
# # read sequence data
# ASV_reps <- readDNAStringSet(
#   file = paste0(data_path, Seq_file),
#   format = "fasta",
#   nrec = -1L,
#   skip = 0L,
#   seek.first.rec = FALSE,
#   use.names = TRUE)
# 
# # generate phyloseq object. **Note: only speedyseq allows constructing phyloseq from tibble!**
# Ps_obj <- phyloseq(otu_table(abundance_mat, taxa_are_rows = TRUE),
#                    sample_data(Metadata),
#                    tax_table(Taxonomy),
#                    refseq(ASV_reps))
# # rename_with_sample_data(Ps_obj, colnames(select(Metadata, -to_names)))
# 
# # merge samples in case the company re-sequenced some samples. **Note! Don't use it now to merge real DNA samples. Instead, do it downstream in one of the next scripts.**
# Ps_obj %>%
#   phyloseq_merge_samples("Read1_file") %>%
#   filter_taxa(., function(x) sum(x) > 0, TRUE) ->
#   Ps_obj_merged

# Comment if there's no RDS file
readRDS(paste0(data_path, Ps_file)) ->
  Ps_obj
Ps_obj %<>% 
  subset_samples(., !grepl(paste(c("CTRL", "NTC", "mock", "Mock", "blank"), collapse = "|"), sample_names(Ps_obj))) 

# Reorder factors for plotting
#sample_data(Ps_obj)$Spill %<>% fct_relevel("Old", after = Inf)

Exploring dataset features

Taxa-based filtering

First let’s look at the taxonomic distribution

table(tax_table(Ps_obj)[, "Kingdom"], exclude = NULL)
## 
##  Archaea Bacteria 
##       52    10048
table(tax_table(Ps_obj)[, "Class"], exclude = NULL)
## 
##           Abditibacteria           Acidimicrobiia           Acidobacteriae 
##                       60                      136                       17 
##           Actinobacteria      Alphaproteobacteria             Anaerolineae 
##                     1131                     1394                        8 
##            Armatimonadia                 Babeliae                  Bacilli 
##                       50                        2                      772 
##              bacteriap25              Bacteroidia BD2-11 terrestrial group 
##                        2                     1009                        1 
##          Bdellovibrionia           Berkelbacteria           Blastocatellia 
##                      107                       25                       29 
##          Campylobacteria               Chlamydiae             Chloroflexia 
##                        3                        7                      465 
##               Clostridia           Coriobacteriia           Cyanobacteriia 
##                      388                       12                      492 
##          Dehalococcoidia               Deinococci       Desulfitobacteriia 
##                        2                      116                        1 
##         Desulfotomaculia         Desulfovibrionia         Desulfuromonadia 
##                        2                       12                        6 
##           Dethiobacteria            Dojkabacteria            Fibrobacteria 
##                        1                        4                        3 
##           Fimbriimonadia            Fusobacteriia      Gammaproteobacteria 
##                        4                       13                     1587 
##         Gemmatimonadetes              Gitt-GS-136          Gracilibacteria 
##                      138                        1                        3 
##             Halobacteria               Holophagae             JG30-KF-CM66 
##                       31                       14                        6 
##             Kapabacteria                   KD4-96          Kiritimatiellae 
##                        5                        9                       16 
##                Kryptonia            Lentisphaeria             Limnochordia 
##                        1                       17                        2 
##            Longimicrobia                MB-A2-108          Methanobacteria 
##                      249                        4                        7 
##           Microgenomatia               Myxococcia              Nanosalinia 
##                        2                      114                        1 
##            Negativicutes          Nitrososphaeria              Nitrospiria 
##                       27                       12                        1 
##                    OLB14              Oligoflexia                    OM190 
##                        2                       74                        1 
##                   P2-11E            Parcubacteria            Phycisphaerae 
##                        1                       32                      117 
##           Planctomycetes                Polyangia             Rhodothermia 
##                      137                      127                       10 
##            Rubrobacteria  S0134 terrestrial group          Saccharimonadia 
##                      115                        1                      233 
##        Sericytochromatia                   SHA-26             Spirochaetia 
##                       23                        1                        3 
##              Subgroup 22               Sumerlaeia          Symbiobacteriia 
##                        2                        1                        1 
##              Synergistia     Thermoanaerobacteria          Thermoleophilia 
##                        1                       23                      292 
##           Thermoplasmata                     TK10             Unclassified 
##                        1                       22                      284 
##                vadinHA49         Vampirivibrionia         Verrucomicrobiae 
##                        6                        7                       48 
##         Vicinamibacteria 
##                       14
# table(tax_table(Ps_obj)[, "Family"], exclude = NULL)

Now let’s remove some taxa which are obvious artefacts or those which aren’t bacteria or archaea

domains2remove <- c("", "Eukaryota", "Unclassified")
orders2remove <- c("Chloroplast")
families2remove <- c("Mitochondria")

Ps_obj_domains <- tax_glom(Ps_obj, "Kingdom")
Ps_obj_orders <- tax_glom(Ps_obj, "Order")
Ps_obj_families <- tax_glom(Ps_obj, "Family")

Ps_obj_tax_filt <- subset_taxa(Ps_obj, !is.na(Phylum) &
                        !Kingdom %in% domains2remove &
                      !Order %in% orders2remove &
                      !Family %in% families2remove)

sample_data(Ps_obj_tax_filt)$Library.size <- rowSums(otu_table(Ps_obj_tax_filt))

This removed:

Summary_pruned <- tibble(
  Level = c("Kingdom", "Order", "Family"),
  ASVs.removed = c(
    table(tax_table(Ps_obj)[, "Kingdom"], exclude = NULL) %>% as.data.frame() %>% .[.$Var1 == "" | .$Var1 == "Eukaryota" | .$Var1 == "Unclassified", 2] %>% sum(),
    table(tax_table(Ps_obj)[, "Order"], exclude = NULL) %>% as.data.frame() %>% .[.$Var1 == "Chloroplast", 2] %>% sum(),
    table(tax_table(Ps_obj)[, "Family"], exclude = NULL) %>% as.data.frame() %>% .[.$Var1 == "Mitochondria", 2] %>% sum()
                     ),
  Seqs.removed = c(
    psmelt(Ps_obj_domains) %>%
      group_by(Kingdom) %>%
      filter(Kingdom == "" |
               Kingdom == "Eukaryota" | Kingdom == "Unclassified") %>%
      summarise(sum = sum(Abundance)) %>% .$sum %>% sum(),
    psmelt(Ps_obj_orders) %>%
      group_by(Order) %>%
      filter(Order == orders2remove) %>%
      summarise(sum = sum(Abundance)) %>% .$sum %>% sum(),
    psmelt(Ps_obj_families) %>%
      group_by(Family) %>%
      filter(Family == families2remove) %>%
      summarise(sum = sum(Abundance)) %>% .$sum %>% sum()
    )
  )

Summary_pruned %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Level ASVs.removed Seqs.removed
Kingdom 0 0
Order 84 71604
Family 58 16054

Removed 0.7462% of the sequences.

Now let’s explore the prevalence of different taxa in the database. Prevalence is the number of samples in which a taxa appears at least once. So “Mean prevalence” refers to in how many samples does a sequence belonging to the phylum appears on average, and “Sum prevalence” is the sum of all samples where any sequence from the taxon appears.

prevdf <- apply(X = otu_table(Ps_obj_tax_filt),
                 MARGIN = ifelse(taxa_are_rows(Ps_obj_tax_filt), yes = 1, no = 2),
                 FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts to this data.frame
prevdf <- data.frame(Prevalence = prevdf,
                      TotalAbundance = taxa_sums(Ps_obj_tax_filt),
                      tax_table(Ps_obj_tax_filt))

prevdf %>%
  group_by(Phylum) %>%
  summarise(`Mean prevalence` = mean(Prevalence),
            `Sum prevalence` = sum(Prevalence)) ->
  Prevalence_phylum_summary

Prevalence_phylum_summary %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Phylum Mean prevalence Sum prevalence
Abditibacteriota 4.6 273
Acidobacteriota 2.9 221
Actinobacteriota 3.7 6280
Armatimonadota 4.4 290
Bacteroidota 3.2 3286
Bdellovibrionota 2.5 459
Campilobacterota 1.0 3
Chloroflexi 3.2 1768
Crenarchaeota 3.3 40
Cyanobacteria 4.9 2153
Deinococcota 4.1 473
Dependentiae 1.5 3
Desulfobacterota 1.1 20
Euryarchaeota 1.4 10
Fibrobacterota 3.3 10
Firmicutes 2.0 2400
Fusobacteriota 1.1 14
Gemmatimonadota 4.1 1617
Halobacterota 1.1 35
MBNT15 1.0 1
Myxococcota 3.6 872
Nanohaloarchaeota 1.0 1
Nitrospirota 2.0 2
Patescibacteria 3.0 907
Planctomycetota 2.3 613
Proteobacteria 2.7 8189
SAR324 clade(Marine group B) 1.8 7
Spirochaetota 1.7 5
Sumerlaeota 1.0 1
Synergistota 1.0 1
Thermoplasmatota 1.0 1
Unclassified 2.4 304
Verrucomicrobiota 1.6 140
prevdf %>%
  group_by(Order) %>%
  summarise(`Mean prevalence` = mean(Prevalence),
            `Sum prevalence` = sum(Prevalence)) ->
  Prevalence_order_summary

Prevalence_order_summary %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Order Mean prevalence Sum prevalence
0319-6G20 2.3 111
0319-7L14 2.5 5
Abditibacteriales 4.6 273
Absconditabacteriales (SR1) 0.0 0
Acetobacterales 3.1 655
Acholeplasmatales 1.0 1
Acidaminococcales 1.0 1
Acidobacteriales 1.0 3
Actinomarinales 1.0 4
Actinomycetales 1.2 10
Aeromonadales 0.9 383
Alicyclobacillales 1.0 1
Alteromonadales 4.5 113
Anaerolineales 1.0 1
Aneurinibacillales 1.0 4
Ardenticatenales 1.7 5
Armatimonadales 5.0 248
Azospirillales 3.3 167
Babeliales 1.5 3
Bacillales 2.2 538
Bacteriovoracales 3.0 135
Bacteroidales 1.0 145
Bacteroidetes VC2.1 Bac22 8.0 8
Bdellovibrionales 2.3 140
Bifidobacteriales 1.4 7
Blastocatellales 4.4 101
Blfdi19 1.5 6
Bryobacterales 2.8 22
Burkholderiales 3.0 1271
Caedibacterales 0.0 0
Caenarcaniphilales 1.0 1
Caldalkalibacillales 5.2 253
Caldicellulosiruptorales 1.2 14
Caldilineales 1.0 2
Campylobacterales 1.0 3
Candidatus Adlerbacteria 1.0 1
Candidatus Daviesbacteria 1.0 1
Candidatus Doudnabacteria 8.0 8
Candidatus Kaiserbacteria 2.5 10
Candidatus Zambryskibacteria 1.3 4
Caulobacterales 3.2 283
CCD24 1.0 2
Cellvibrionales 1.5 3
Chitinophagales 2.8 289
Chlamydiales 1.0 7
Chloroflexales 2.6 88
Christensenellales 0.9 31
Chthoniobacterales 2.8 78
Clostridia UCG-014 1.0 6
Clostridia vadinBB60 group 1.0 13
Clostridiales 1.1 17
Coriobacteriales 1.2 14
Corynebacteriales 2.4 315
Coxiellales 2.0 6
Cyanobacteriales 5.3 1571
Cytophagales 4.1 2244
Deinococcales 4.1 425
Desulfitobacteriales 1.0 1
Desulfotomaculales 1.0 2
Desulfovibrionales 1.0 12
Dethiobacterales 1.0 1
Diplorickettsiales 1.1 33
Elev-16S-573 1.0 1
Elsterales 1.0 2
Enterobacterales 1.6 117
Entomoplasmatales 1.0 1
Erysipelotrichales 1.1 27
Eubacteriales 1.0 1
Euzebyales 1.9 25
EV818SWSAP88 1.5 3
Exiguobacterales 3.0 6
Ferrovibrionales 2.0 2
Fibrobacterales 3.3 10
Fimbriimonadales 1.8 7
Flavobacteriales 2.1 269
Frankiales 5.5 1866
Fusobacteriales 1.1 14
Gaiellales 2.2 56
Gammaproteobacteria Incertae Sedis 1.5 3
Gastranaerophilales 1.0 2
Gemmatales 1.2 7
Gemmatimonadales 3.8 523
Geoalkalibacteraceae 1.2 5
Geobacterales 1.0 2
Glycomycetales 1.0 3
Haliangiales 3.8 140
Halobacterales 1.1 35
Holosporales 1.0 1
Hungateiclostridiaceae 1.0 3
IMCC26256 1.2 10
Isosphaerales 2.3 249
Izemoplasmatales 1.0 6
JGI 0000069-P22 2.0 2
Kallotenuales 3.7 1112
Kapabacteriales 1.4 7
Kineosporiales 5.0 140
Kryptoniales 1.0 1
Lachnospirales 1.0 65
Lactobacillales 2.4 373
Legionellales 1.0 3
Leptolyngbyales 1.3 8
Limnochordales 1.0 2
Longimicrobiales 4.3 1071
Methanobacteriales 1.4 10
Methanomassiliicoccales 1.0 1
Methylacidiphilales 1.0 1
Micrococcales 2.6 787
Micromonosporales 4.5 190
Microtrichales 2.1 141
mle1-27 4.2 79
Monoglobales 1.0 12
MSB-4B10 1.0 1
Mycoplasmatales 1.0 1
Myxococcales 4.2 484
Nannocystales 14.7 44
Nanosalinales 1.0 1
Nitrosococcales 5.0 5
Nitrososphaerales 3.3 40
Nitrospirales 2.0 2
Obscuribacterales 0.0 0
Oceanospirillales 3.1 96
Oligoflexales 2.6 13
Oligosphaerales 1.0 1
Opitutales 1.0 4
Oscillospirales 1.0 160
Oxyphotobacteria Incertae Sedis 4.5 413
Paenibacillales 2.9 295
Paracaedibacterales 1.5 3
Parvibaculales 1.0 1
Pasteurellales 2.1 29
Pedosphaerales 1.0 4
Peptococcales 1.0 1
Peptostreptococcales-Tissierellales 1.3 80
Phycisphaerales 1.0 3
Pirellulales 1.0 19
Planctomycetales 1.8 9
PLTA13 1.0 3
Polyangiales 1.9 115
Propionibacteriales 5.1 824
Pseudomonadales 3.5 1346
Pseudonocardiales 2.6 132
Puniceispirillales 1.0 1
Pyrinomonadales 3.0 15
R7C24 1.0 1
Reyranellales 1.0 3
RF39 1.0 14
Rhizobiales 2.6 1209
Rhodobacterales 4.7 731
Rhodospirillales 1.1 8
Rhodothermales 1.9 19
Rickettsiales 1.0 6
Rubrobacterales 5.3 613
S085 1.5 3
Saccharimonadales 3.3 770
Salinisphaerales 1.0 2
SAR11 clade 1.0 1
SBR1031 1.5 3
Silvanigrellales 3.0 60
Solibacterales 1.0 4
Solirubrobacterales 3.2 829
Sphingobacteriales 3.4 303
Sphingomonadales 4.3 1293
Spirochaetales 1.7 5
Staphylococcales 2.7 269
Steroidobacterales 1.0 3
Streptomycetales 1.2 21
Streptosporangiales 3.7 59
Subgroup 13 1.0 1
Subgroup 17 1.0 1
Subgroup 2 1.0 1
Subgroup 7 4.0 56
Sumerlaeales 1.0 1
Symbiobacteriales 1.0 1
Synergistales 1.0 1
Tepidisphaerales 2.6 302
Thermales 4.2 46
Thermicanales 1.3 73
Thermoactinomycetales 1.0 2
Thermoanaerobacterales 1.9 21
Thermobaculales 3.5 49
Thermomicrobiales 3.3 384
Thermosynechococcales 4.9 59
Tistrellales 2.0 44
Unclassified 2.1 1199
Vampirovibrionales 1.0 2
Veillonellales-Selenomonadales 1.2 30
Verrucomicrobiales 1.1 10
Vibrionales 2.1 15
Vicinamibacterales 1.1 14
Victivallales 1.0 16
WCHB1-41 1.0 16
WD260 1.0 3
Xanthomonadales 1.7 178

Based on that I’ll remove all orders with a sum prevalence of under 5% (10) of all samples

Prevalence_order_summary %>% 
  filter(`Sum prevalence` < (0.05 * nsamples(Ps_obj_tax_filt))) %>% 
  dplyr::select(Order) %>% 
  map(as.character) %>% 
  unlist() ->
  filterOrder

Ps_obj_order_prev_filt <- subset_taxa(Ps_obj_tax_filt, !Order %in% filterOrder)
# Taxonomy.bs %<>% 
#   filter(Taxonomy.bs$Name %in% row.names(Ps_obj_order_prev_filt@tax_table))
sample_data(Ps_obj_order_prev_filt)$Library.size <- rowSums(otu_table(Ps_obj_order_prev_filt))
print(Ps_obj_tax_filt)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 9958 taxa and 200 samples ]:
## sample_data() Sample Data:        [ 200 samples by 27 sample variables ]:
## tax_table()   Taxonomy Table:     [ 9958 taxa by 6 taxonomic ranks ]:
## refseq()      DNAStringSet:       [ 9958 reference sequences ]
## taxa are columns
print(Ps_obj_order_prev_filt)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 9746 taxa and 200 samples ]:
## sample_data() Sample Data:        [ 200 samples by 27 sample variables ]:
## tax_table()   Taxonomy Table:     [ 9746 taxa by 6 taxonomic ranks ]:
## refseq()      DNAStringSet:       [ 9746 reference sequences ]
## taxa are columns

This removed 212 or 2% of the ASVs, and 0.564% of the reads.

Plot general prevalence features of the phyla

# Subset to the remaining phyla
prevdf_phylum_filt <- subset(prevdf, Phylum %in% get_taxa_unique(Ps_obj_order_prev_filt, "Phylum"))
ggplot(prevdf_phylum_filt,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_order_prev_filt), color = Phylum)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Phylum) + theme(legend.position = "none")

Plot general prevalence features of the top 20 orders

# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf, Order %in% get_taxa_unique(Ps_obj_order_prev_filt, "Order"))

# grab the top 30 most abundant orders
prevdf_order_filt %>% 
  group_by(Order) %>%
  summarise(Combined.abundance = sum(TotalAbundance)) %>% 
  arrange(desc(Combined.abundance)) %>% 
  .[1:30, "Order"]  ->
  Orders2plot

prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)

ggplot(prevdf_order_filt2,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_order_prev_filt), color = Order)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Order) + theme(legend.position = "none")

Unsupervised filtering by prevalence

I’ll remove all sequences which appear in less than 2.5% of the samples

# Define prevalence threshold as 5% of total samples
prevalenceThreshold <- 0.025 * nsamples(Ps_obj_order_prev_filt)
prevalenceThreshold
## [1] 5
# Execute prevalence filter, using `prune_taxa()` function
keepTaxa <-
  row.names(prevdf_phylum_filt)[(prevdf_phylum_filt$Prevalence >= prevalenceThreshold)]
Ps_obj_seq_prev_filt <- prune_taxa(keepTaxa, Ps_obj_order_prev_filt)
# Taxonomy.bs %<>% 
#   filter(Taxonomy.bs$Name %in% row.names(Ps_obj_seq_prev_filt@tax_table))
sample_data(Ps_obj_seq_prev_filt)$Library.size <- rowSums(otu_table(Ps_obj_seq_prev_filt))
print(Ps_obj_order_prev_filt)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 9746 taxa and 200 samples ]:
## sample_data() Sample Data:        [ 200 samples by 27 sample variables ]:
## tax_table()   Taxonomy Table:     [ 9746 taxa by 6 taxonomic ranks ]:
## refseq()      DNAStringSet:       [ 9746 reference sequences ]
## taxa are columns
print(Ps_obj_seq_prev_filt)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:          [ 1404 taxa and 200 samples ]:
## sample_data() Sample Data:        [ 200 samples by 27 sample variables ]:
## tax_table()   Taxonomy Table:     [ 1404 taxa by 6 taxonomic ranks ]:
## refseq()      DNAStringSet:       [ 1404 reference sequences ]
## taxa are columns

This removed 8342 or 86% of the ASVs! But only 16.039% of the reads.

However all these removed ASVs accounted for only:

prevdf_phylum_filt %>% 
  arrange(., Prevalence) %>% 
  group_by(Prevalence > prevalenceThreshold) %>% 
  summarise(Abundance = sum(TotalAbundance)) %>%
  mutate(`Rel. Ab.` = percent(Abundance / sum(Abundance), accuracy = 0.01)) %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Prevalence > prevalenceThreshold Abundance Rel. Ab.
FALSE 1700609 17.90%
TRUE 7802313 82.10%

So it’s fine to remove them.

General taxonomic features

Ps_obj_seq_prev_filt %>%
  subset_samples(., sample_sums(Ps_obj_seq_prev_filt) > min_lib_size) %>%
  filter_taxa(., function(x)
    sum(x) > 0, TRUE) %>%
  transform_sample_counts(., function(x)
    x / sum(x) * 100) ->
  Ps_obj_seq_prev_filt_subset

Ps_obj_seq_prev_filt_subset %>% 
  sample_data() %>% 
  as_tibble() %>% 
  arrange(Sample) %>% 
  pull(.sample) ->
  Sample.order

Ps_obj_seq_prev_filt_subset_100 <-
  prune_taxa(names(sort(taxa_sums(Ps_obj_seq_prev_filt_subset), TRUE)[1:100]), Ps_obj_seq_prev_filt_subset)
plot_heatmap(
  Ps_obj_seq_prev_filt_subset_100,
  method = NULL,
  distance = NULL,
  sample.label = "Sample",
  taxa.label = "Order",
  sample.order = Sample.order,
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_seq_prev_filt_subset_firmi <-
  subset_taxa(Ps_obj_seq_prev_filt_subset, Phylum == "Firmicutes")
plot_heatmap(
  Ps_obj_seq_prev_filt_subset_firmi,
  method = NULL,
  distance = NULL,
  sample.label = "Sample",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_seq_prev_filt_subset %>%
  subset_taxa(., Phylum == "Actinobacteriota") %>%
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_seq_prev_filt_subset_actino
plot_heatmap(
  Ps_obj_seq_prev_filt_subset_actino,
  method = NULL,
  distance = NULL,
  sample.label = "Sample",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_seq_prev_filt_subset %>%
  subset_taxa(., Phylum == "Bacteroidota") %>%
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_seq_prev_filt_subset_bacter

plot_heatmap(
  Ps_obj_seq_prev_filt_subset_bacter,
  method = NULL,
  distance = NULL,
  sample.label = "Sample",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_seq_prev_filt_subset %>%
  subset_taxa(., Phylum == "Proteobacteria") %>%
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_seq_prev_filt_subset_proteo

plot_heatmap(
  Ps_obj_seq_prev_filt_subset_proteo,
  method = NULL,
  distance = NULL,
  sample.label = "Sample",
  sample.order = Sample.order,
  taxa.label = "Phylum",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_seq_prev_filt_subset %>% 
  subset_taxa(., Class == "Cyanobacteriia") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_seq_prev_filt_subset_chitino
plot_heatmap(
  Ps_obj_seq_prev_filt_subset_chitino,
  method = NULL,
  distance = NULL,
  sample.label = "Sample",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_seq_prev_filt_subset %>% 
  subset_taxa(., Order == "Chitinophagales") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_seq_prev_filt_subset_chitino
plot_heatmap(
  Ps_obj_seq_prev_filt_subset_chitino,
  method = NULL,
  distance = NULL,
  sample.label = "Sample",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

Ps_obj_seq_prev_filt_subset %>% 
  subset_taxa(., Order == "Rhizobiales") %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) ->
  Ps_obj_seq_prev_filt_subset_rhizo
plot_heatmap(
  Ps_obj_seq_prev_filt_subset_rhizo,
  method = NULL,
  distance = NULL,
  sample.label = "Sample",
  sample.order = Sample.order,
  taxa.label = "Order",
  low = "#000033",
  high = "#FF3300"
)

# Ps_obj_seq_prev_filt_subset %>% 
#   subset_taxa(., Order == "Betaproteobacteriales") %>% 
#   transform_sample_counts(., function(x) x / sum(x) * 100) ->
#   Ps_obj_seq_prev_filt_subset_beta
# plot_heatmap(
#   Ps_obj_seq_prev_filt_subset_beta,
#   method = NULL,
#   distance = NULL,
#   sample.label = "Sample.name",
#   sample.order = Sample.order,
#   taxa.label = "Order",
#   low = "#000033",
#   high = "#FF3300"
# )

Let us look at the agglomerated taxa

Ps_obj_seq_prev_filt_subset %>% 
  transform_sample_counts(., function(x) x / sum(x) * 100) %>% 
  tax_glom(., "Phylum", NArm = TRUE) ->
  Ps_obj_seq_prev_glom

plot_heatmap(
  Ps_obj_seq_prev_glom,
  # method = "NMDS",
  # distance = "bray",
  sample.order = Sample.order,
  sample.label = "Sample",
  taxa.label = "Phylum",
  taxa.order = "Phylum",
  low = "#000033",
  high = "#FF3300"
) + theme_bw(base_size = 20) + theme(axis.text.x = element_text(hjust = 1.0, angle = 45.0))

Explore abundance distribution of specific taxa

Ps_obj_seq_prev_filt_subset_ra <- transform_sample_counts(Ps_obj_seq_prev_filt_subset, function(x){x / sum(x)} * 100)
plot_abundance(Ps_obj_seq_prev_filt_subset_ra, taxa = "Proteobacteria")

# plotBefore <- plot_abundance(Ps_obj_seq_prev_filt_subset, taxa = "Proteobacteria")
# plotAfter <- plot_abundance(Ps_obj_seq_prev_filt_subset_ra, taxa = "Proteobacteria")
# Combine each plot into one graphic.
# grid.arrange(nrow = 2, plotBefore, plotAfter)
Ps_obj_seq_prev_filt_subset_ra_taxa <- subset_taxa(Ps_obj_seq_prev_filt_subset_ra, Order == "Bacillales")
plot_abundance(Ps_obj_seq_prev_filt_subset_ra_taxa, Facet = "Genus")

Taxa violin plots

taxa_order <- order_taxa(Ps_obj_seq_prev_filt_subset)
Ps_obj_seq_prev_filt_subset_grouped <- mark_rare_taxa(Ps_obj_seq_prev_filt_subset, rank = "Phylum", rare_thresh = 0.01)
plot_tax_violin(Ps_obj_seq_prev_filt_subset_grouped, grouping_var1 = Var3, grouping_var2 = Var2, taxa_order = taxa_order)

Make Krona plots

dir.create(paste0(data_path, "Krona/")) # fore the tables
for (i in seq(nsamples(Ps_obj_seq_prev_filt_subset))) {
  sample_data <-
    data.frame(t(otu_table(Ps_obj_seq_prev_filt_subset)[i,]), tax_table(Ps_obj_seq_prev_filt_subset))
  sample_data <- sample_data[sample_data[, 1] > 0, ]
  write_tsv(sample_data, paste0(data_path, "Krona/", sample_names(Ps_obj_seq_prev_filt_subset)[i], ".tsv"))
}

sample_data(Ps_obj_seq_prev_filt_subset_ra)$Krona_combinations <- paste0(
  get_variable(Ps_obj_seq_prev_filt_subset_ra, Var3),
  ".",
  get_variable(Ps_obj_seq_prev_filt_subset_ra, Var2),
  ".",
  get_variable(Ps_obj_seq_prev_filt_subset_ra, Var3)
)

Ps_obj_seq_prev_filt_subset_ra %>%
  phyloseq::merge_samples(., "Krona_combinations", fun = mean) ->
  merged_Ps_obj

for (i in seq(nsamples(merged_Ps_obj))) {
  sample_data <-
    data.frame(t(otu_table(merged_Ps_obj)[i,]), tax_table(merged_Ps_obj))
  sample_data <- sample_data[sample_data[, 1] > 0, ]
  write_tsv(sample_data, paste0(data_path, "Krona/", Proj_name, ".", sample_names(merged_Ps_obj)[i], ".tsv"))
}

list.files(paste0(data_path, "Krona/"), full.names = TRUE) %>%
  paste(., collapse = " ") %>%
  paste0("/usr/local/bin/ktImportText ", .,
         " -o ",
         Proj_name,
         "_Krona.html") %>%
  system()

Save filtered phyloseq object

saveRDS(Ps_obj_tax_filt, file = paste0(data_path, Proj_name, "_tax_filt.Rds"))
# save seqs
Ps_obj_tax_filt %>%  
  refseq() %>% 
  writeXStringSet(., filepath = paste0(data_path, "DADA2_reps_tax_filt.fa"), format = "fasta", width = 1000)
saveRDS(Ps_obj_order_prev_filt, file = paste0(data_path, Proj_name, "_tax_prev_filt.Rds"))
Ps_obj_order_prev_filt %>%  
  refseq() %>% 
  writeXStringSet(., filepath = paste0(data_path, "DADA2_reps_tax_prev_filt.fa"), format = "fasta", width = 1000)
saveRDS(Ps_obj_seq_prev_filt, file = paste0(data_path, Proj_name, "_seq_prev_filt.Rds"))
Ps_obj_seq_prev_filt %>%  
  refseq() %>% 
  writeXStringSet(., filepath = paste0(data_path, "DADA2_reps_seq_prev_filt.fa"), format = "fasta", width = 1000)

# save filtered seqtab
Ps_obj_seq_prev_filt %>% 
  t() %>%
  get_taxa() %>% 
  as_tibble(rownames = "ASV") %>%
  write_tsv(., 
            paste0(data_path, Proj_name, "_seqtab_seq_prev_filt.tsv"), 
            col_names = TRUE)

Ps_obj_seq_prev_filt %>% 
  t() %>% 
  tax_table() %>% 
  as_tibble() %>%
  rename(.otu = "ASV") %>% 
  write_tsv(., 
            paste0(data_path, Proj_name, "_taxa_silva_seq_prev_filt.tsv"), 
            col_names = TRUE)
sessioninfo::session_info() %>%
  details::details(
    summary = 'Current session info',
    open    = TRUE
  )
Current session info

─ Session info ─────────────────────────────────────────────────────────────────────────
 setting  value                       
 version  R version 4.1.1 (2021-08-10)
 os       Ubuntu 18.04.6 LTS          
 system   x86_64, linux-gnu           
 ui       X11                         
 language (EN)                        
 collate  en_US.UTF-8                 
 ctype    en_US.UTF-8                 
 tz       Europe/Prague               
 date     2021-10-21                  

─ Packages ─────────────────────────────────────────────────────────────────────────────
 package          * version    date       lib source                           
 ade4               1.7-18     2021-09-16 [1] CRAN (R 4.1.1)                   
 ape                5.5        2021-04-25 [1] CRAN (R 4.0.3)                   
 assertthat         0.2.1      2019-03-21 [1] CRAN (R 4.0.2)                   
 backports          1.2.1      2020-12-09 [1] CRAN (R 4.0.2)                   
 Biobase            2.52.0     2021-05-19 [1] Bioconductor                     
 BiocGenerics     * 0.38.0     2021-05-19 [1] Bioconductor                     
 biomformat         1.20.0     2021-05-19 [1] Bioconductor                     
 Biostrings       * 2.60.2     2021-08-05 [1] Bioconductor                     
 bit                4.0.4      2020-08-04 [1] CRAN (R 4.0.2)                   
 bit64              4.0.5      2020-08-30 [1] CRAN (R 4.0.2)                   
 bitops             1.0-7      2021-04-24 [1] CRAN (R 4.0.3)                   
 broom              0.7.9      2021-07-27 [1] CRAN (R 4.1.0)                   
 bslib              0.3.1      2021-10-06 [1] CRAN (R 4.1.1)                   
 cellranger         1.1.0      2016-07-27 [1] CRAN (R 4.0.2)                   
 cli                3.0.1      2021-07-17 [1] CRAN (R 4.1.0)                   
 clipr              0.7.1      2020-10-08 [1] CRAN (R 4.0.2)                   
 cluster            2.1.2      2021-04-17 [1] CRAN (R 4.0.3)                   
 codetools          0.2-18     2020-11-04 [1] CRAN (R 4.0.2)                   
 colorspace         2.0-2      2021-06-24 [1] CRAN (R 4.1.0)                   
 cowplot          * 1.1.1      2020-12-30 [1] CRAN (R 4.0.2)                   
 crayon             1.4.1      2021-02-08 [1] CRAN (R 4.0.3)                   
 data.table         1.14.2     2021-09-27 [1] CRAN (R 4.1.1)                   
 DBI                1.1.1      2021-01-15 [1] CRAN (R 4.0.3)                   
 dbplyr             2.1.1      2021-04-06 [1] CRAN (R 4.0.3)                   
 desc               1.4.0      2021-09-28 [1] CRAN (R 4.1.1)                   
 details            0.2.1      2020-01-12 [1] CRAN (R 4.0.2)                   
 digest             0.6.28     2021-09-23 [1] CRAN (R 4.1.1)                   
 dplyr            * 1.0.7      2021-06-18 [1] CRAN (R 4.1.0)                   
 ellipsis           0.3.2      2021-04-29 [1] CRAN (R 4.0.3)                   
 evaluate           0.14       2019-05-28 [1] CRAN (R 4.0.2)                   
 extrafont        * 0.17       2014-12-08 [1] CRAN (R 4.1.0)                   
 extrafontdb        1.0        2012-06-11 [1] CRAN (R 4.0.2)                   
 fansi              0.5.0      2021-05-25 [1] CRAN (R 4.0.3)                   
 farver             2.1.0      2021-02-28 [1] CRAN (R 4.0.3)                   
 fastmap            1.1.0      2021-01-25 [1] CRAN (R 4.0.3)                   
 forcats          * 0.5.1      2021-01-27 [1] CRAN (R 4.0.3)                   
 foreach            1.5.1      2020-10-15 [1] CRAN (R 4.0.2)                   
 fs                 1.5.0      2020-07-31 [1] CRAN (R 4.0.2)                   
 generics           0.1.0      2020-10-31 [1] CRAN (R 4.0.2)                   
 GenomeInfoDb     * 1.28.4     2021-09-05 [1] Bioconductor                     
 GenomeInfoDbData   1.2.6      2021-05-25 [1] Bioconductor                     
 ggplot2          * 3.3.5      2021-06-25 [1] CRAN (R 4.1.0)                   
 glue               1.4.2      2020-08-27 [1] CRAN (R 4.0.2)                   
 gtable             0.3.0      2019-03-25 [1] CRAN (R 4.0.2)                   
 haven              2.4.3      2021-08-04 [1] CRAN (R 4.1.0)                   
 highr              0.9        2021-04-16 [1] CRAN (R 4.0.3)                   
 hms                1.1.1      2021-09-26 [1] CRAN (R 4.1.1)                   
 htmltools          0.5.2      2021-08-25 [1] CRAN (R 4.1.1)                   
 httr               1.4.2      2020-07-20 [1] CRAN (R 4.0.2)                   
 igraph             1.2.7      2021-10-15 [1] CRAN (R 4.1.1)                   
 IRanges          * 2.26.0     2021-05-19 [1] Bioconductor                     
 iterators          1.0.13     2020-10-15 [1] CRAN (R 4.0.2)                   
 jquerylib          0.1.4      2021-04-26 [1] CRAN (R 4.0.3)                   
 jsonlite           1.7.2      2020-12-09 [1] CRAN (R 4.0.2)                   
 kableExtra       * 1.3.4      2021-02-20 [1] CRAN (R 4.0.3)                   
 knitr              1.36       2021-09-29 [1] CRAN (R 4.1.1)                   
 labeling           0.4.2      2020-10-20 [1] CRAN (R 4.0.2)                   
 lattice          * 0.20-45    2021-09-22 [1] CRAN (R 4.1.1)                   
 lifecycle          1.0.1      2021-09-24 [1] CRAN (R 4.1.1)                   
 lubridate          1.8.0      2021-10-07 [1] CRAN (R 4.1.1)                   
 magrittr         * 2.0.1      2020-11-17 [1] CRAN (R 4.0.2)                   
 MASS               7.3-54     2021-05-03 [1] CRAN (R 4.0.3)                   
 Matrix             1.3-4      2021-06-01 [1] CRAN (R 4.1.0)                   
 mgcv               1.8-38     2021-10-06 [1] CRAN (R 4.1.1)                   
 modelr             0.1.8      2020-05-19 [1] CRAN (R 4.0.2)                   
 multtest           2.48.0     2021-05-19 [1] Bioconductor                     
 munsell            0.5.0      2018-06-12 [1] CRAN (R 4.0.2)                   
 nlme               3.1-153    2021-09-07 [1] CRAN (R 4.1.1)                   
 permute          * 0.9-5      2019-03-12 [1] CRAN (R 4.0.2)                   
 phyloseq         * 1.36.0     2021-05-19 [1] Bioconductor                     
 pillar             1.6.4      2021-10-18 [1] CRAN (R 4.1.1)                   
 pkgconfig          2.0.3      2019-09-22 [1] CRAN (R 4.0.2)                   
 plyr               1.8.6      2020-03-03 [1] CRAN (R 4.0.2)                   
 png                0.1-7      2013-12-03 [1] CRAN (R 4.0.2)                   
 purrr            * 0.3.4      2020-04-17 [1] CRAN (R 4.0.2)                   
 R6                 2.5.1      2021-08-19 [1] CRAN (R 4.1.1)                   
 ragg               1.1.3      2021-06-09 [1] CRAN (R 4.1.0)                   
 Rcpp               1.0.7      2021-07-07 [1] CRAN (R 4.1.0)                   
 RCurl              1.98-1.5   2021-09-17 [1] CRAN (R 4.1.1)                   
 readr            * 2.0.2      2021-09-27 [1] CRAN (R 4.1.1)                   
 readxl             1.3.1      2019-03-13 [1] CRAN (R 4.0.2)                   
 reprex             2.0.1      2021-08-05 [1] CRAN (R 4.1.0)                   
 reshape2           1.4.4      2020-04-09 [1] CRAN (R 4.0.2)                   
 rhdf5              2.36.0     2021-05-19 [1] Bioconductor                     
 rhdf5filters       1.4.0      2021-05-19 [1] Bioconductor                     
 Rhdf5lib           1.14.2     2021-07-06 [1] Bioconductor                     
 rlang              0.4.12     2021-10-18 [1] CRAN (R 4.1.1)                   
 rmarkdown          2.11       2021-09-14 [1] CRAN (R 4.1.1)                   
 rprojroot          2.0.2      2020-11-15 [1] CRAN (R 4.0.2)                   
 rstudioapi         0.13       2020-11-12 [1] CRAN (R 4.0.2)                   
 Rttf2pt1           1.3.9      2021-07-22 [1] CRAN (R 4.1.0)                   
 rvest              1.0.2      2021-10-16 [1] CRAN (R 4.1.1)                   
 S4Vectors        * 0.30.2     2021-10-03 [1] Bioconductor                     
 sass               0.4.0      2021-05-12 [1] CRAN (R 4.0.3)                   
 scales           * 1.1.1      2020-05-11 [1] CRAN (R 4.0.2)                   
 see              * 0.6.8      2021-10-03 [1] CRAN (R 4.1.1)                   
 sessioninfo        1.1.1      2018-11-05 [1] CRAN (R 4.0.2)                   
 speedyseq        * 0.5.3.9018 2021-08-11 [1] Github (mikemc/speedyseq@ceb941f)
 stringi            1.7.5      2021-10-04 [1] CRAN (R 4.1.1)                   
 stringr          * 1.4.0      2019-02-10 [1] CRAN (R 4.0.2)                   
 survival           3.2-13     2021-08-24 [1] CRAN (R 4.1.1)                   
 svglite          * 2.0.0      2021-02-20 [1] CRAN (R 4.1.0)                   
 systemfonts        1.0.3      2021-10-13 [1] CRAN (R 4.1.1)                   
 textshaping        0.3.6      2021-10-13 [1] CRAN (R 4.1.1)                   
 tibble           * 3.1.5      2021-09-30 [1] CRAN (R 4.1.1)                   
 tidyr            * 1.1.4      2021-09-27 [1] CRAN (R 4.1.1)                   
 tidyselect         1.1.1      2021-04-30 [1] CRAN (R 4.0.3)                   
 tidyverse        * 1.3.1      2021-04-15 [1] CRAN (R 4.0.3)                   
 tzdb               0.1.2      2021-07-20 [1] CRAN (R 4.1.0)                   
 utf8               1.2.2      2021-07-24 [1] CRAN (R 4.1.0)                   
 vctrs              0.3.8      2021-04-29 [1] CRAN (R 4.0.3)                   
 vegan            * 2.5-7      2020-11-28 [1] CRAN (R 4.0.3)                   
 viridisLite        0.4.0      2021-04-13 [1] CRAN (R 4.0.3)                   
 vroom              1.5.5      2021-09-14 [1] CRAN (R 4.1.1)                   
 webshot            0.5.2      2019-11-22 [1] CRAN (R 4.0.2)                   
 withr              2.4.2      2021-04-18 [1] CRAN (R 4.0.3)                   
 xfun               0.27       2021-10-18 [1] CRAN (R 4.1.1)                   
 xml2               1.3.2      2020-04-23 [1] CRAN (R 4.0.2)                   
 XVector          * 0.32.0     2021-05-19 [1] Bioconductor                     
 yaml               2.2.1      2020-02-01 [1] CRAN (R 4.0.2)                   
 zlibbioc           1.38.0     2021-05-19 [1] Bioconductor                     

[1] /home/angel/R/library
[2] /usr/local/lib/R/site-library
[3] /usr/lib/R/site-library
[4] /usr/lib/R/library


## References
Callahan BJ, Sankaran K, Fukuyama JA et al. Bioconductor workflow for microbiome data analysis: From raw reads to community analyses. F1000Research 2016;5:1492.